rm(list = ls())
source("Accepted Paper Replication Archive/code/utils.R")


top_tier <- read_csv("Accepted Paper Replication Archive/clean_data/top_tier_positions.csv") %>%
  as.data.frame()
head(top_tier)
dim(top_tier)

rank_and_file <- read_csv("Accepted Paper Replication Archive/clean_data/rank_and_file.csv") %>%
  as.data.frame()
head(rank_and_file)

## Figure 1  -------------------------------------------


#### get % women in workforce over same period
### source: https://www.dol.gov/agencies/wb/data/facts-over-time/women-in-the-labor-force#civilian-labor-force-by-sex

workforce_gender <- read_csv("Accepted Paper Replication Archive/clean_data/workforce_gender.csv") %>%
  as.data.frame()
head(workforce_gender)
class(workforce_gender$Year)
workforce_gender$Women.in.the.Labor.Force <- as.numeric(gsub(",", "", 
                                                             as.character(workforce_gender$Women.in.the.Labor.Force)))
workforce_gender$Men.in.the..Labor.Force <- as.numeric(gsub(",", "",
                                                            as.character(workforce_gender$Men.in.the..Labor.Force)))
workforce_gender$share_women2 <- workforce_gender$Women.in.the.Labor.Force / 
  (workforce_gender$Women.in.the.Labor.Force + workforce_gender$Men.in.the..Labor.Force)
workforce_gender$share_men2 <- workforce_gender$Men.in.the..Labor.Force / 
  (workforce_gender$Women.in.the.Labor.Force + workforce_gender$Men.in.the..Labor.Force)

workforce_gender$year <- as.numeric(workforce_gender$Year)
workforce_gender

workforce_gender2 <- workforce_gender ## keep all years
workforce_gender <- workforce_gender[workforce_gender$Year >= 1973, ] ## subset


##Load historical data on gender composition of Cabinet
##
cabinet_history <- read_csv("Accepted Paper Replication Archive/clean_data/cabinet_history.csv") %>%
  as.data.frame()
head(cabinet_history)

cabinet_history$pct_fem <- (cabinet_history$Nu_women_miller / cabinet_history$Nu_cabinet_posts_miller) * 100
cabinet_history$pct_fem2 <- (cabinet_history$alt_women / cabinet_history$alt_cabinet_posts) * 100
class(cabinet_history$Year)


# fortune 500 CEOs: source: https://quantic.edu/blog/2021/12/06/how-many-fortune-500-ceos-are-women/
fort <- read_csv("Accepted Paper Replication Archive/clean_data/fort.csv") %>%
  as.data.frame()
head(fort)
fort$fem <- (fort$Women / (fort$Women + fort$Men)) * 100
fort$Year <- as.numeric(as.character(fort$Year))
head(fort)

## grab %chief executives since 2003, when it starts: https://www.bls.gov/cps/tables.htm
## Table 11: Employed persons by detailed occupation, sex, race, and Hispanic or Latino ethnicity

chiefs <- as.data.frame(matrix(ncol = 2, nrow = length(2003:2021)))
colnames(chiefs) <- c("year", "percent")
chiefs$year <- 2003:2021
head(chiefs)

# manually entered from the web site above
fem <- c(
  23.5,
  23.3,
  23.8,
  23.4,
  25.6,
  23.4,
  25,
  25.5,
  24.2,
  27.4,
  26.8,
  26.3,
  27.9,
  27.3,
  28,
  26.9,
  27.6,
  29.3,
  29.1
)
chiefs$percent <- fem

### get loess predictions. hard code to zero if loess predicts a negative value.
m <- loess(cabinet_history$pct_fem ~ cabinet_history$Year, data = cabinet_history, span = .3)
yhat <- predict(m)[order(cabinet_history$Year)]
yhat[yhat < 0] <- 0
m2 <- loess(cabinet_history$pct_fem2 ~ cabinet_history$Year, data = cabinet_history, span = .3)
yhat2 <- predict(m2)[order(cabinet_history$Year)]
yhat2[yhat2 < 0] <- 0
m3 <- loess(workforce_gender2$share_women2 * 100 ~ workforce_gender2$Year, data = workforce_gender2, span = .3)
yhat3 <- predict(m3)[order(workforce_gender2$Year)]
m4 <- loess(fort$fem ~ fort$Year, data = fort, span = .3)
yhat4 <- predict(m4)[order(fort$Year)]
m5 <- loess(percent ~ year, data = chiefs, span = .5)
yhat5 <- predict(m5)[order(chiefs$year)]

# cabinet.pdf
pdf(file = "Accepted Paper Replication Archive/figures/figure_1.pdf")

plot(cabinet_history$Year, cabinet_history$pct_fem,
  axes = F, ylab = "% Women", xlab = "Year",
  pch = 19, cex = .7, col = "black", ylim = c(0, max(c(cabinet_history$pct_fem, cabinet_history$pct_fem2, workforce_gender2$share_women2 * 100)))
)
axis(1, at = seq(1700, 2050, by = 50))
axis(2, at = seq(0, 50, by = 10), las = 1)
lines(cabinet_history$Year, yhat, lwd = 2)
points(cabinet_history$Year, cabinet_history$pct_fem2, pch = 19, col = t_col("#0072B2", percent = 75), cex = .5)
lines(cabinet_history$Year, yhat2, col = "#0072B2", lwd = 3)
points(workforce_gender2$Year, workforce_gender2$share_women2 * 100, pch = 19, col = t_col("#E69F00", percent = 75), cex = .7)
lines(workforce_gender2$Year, yhat3, col = "#E69F00", lwd = 2)
points(fort$Year, fort$fem, pch = 19, col = t_col("red", percent = 75), cex = .7)
lines(fort$Year, yhat4, col = "red", lwd = 2)
points(chiefs$year, chiefs$percent, pch = 19, col = t_col("darkgreen", percent = 75), cex = .7)
lines(chiefs$year, yhat5, col = "darkgreen", lwd = 2)


legend(
  x = 1810, y = 40, legend = c("U.S. Workforce", "Cabinet", "Cabinet Level", "CEOs", "Fortune 500 CEOs"),
  lty = c(1, 1, 1, 1, 1), col = c("#E69F00", "black", "#0072B2", "darkgreen", "red"), cex = 1, lwd = c(2, 2, 2, 2, 2)
)
arrows(x0 = 1900, x1 = 1930, y0 = 13, y1 = 9.5, length = .1)
text(x = 1885, y = 15, labels = "Appointment of Frances Perkins\n Sec. of Labor", cex = .8)

dev.off()



## Figure 2 -------------------------------------------------


#### average top-tier %female by year
res <- summaryBy(fem2 ~ year, data = top_tier, FUN = mean, na.rm = T) ## vacancies not in denominator
res

dim(top_tier[!is.na(top_tier$year) & !is.na(top_tier$fem2), ])

temp <- summaryBy(fem2b ~ year, data = top_tier, FUN = mean, na.rm = T) ## vacancies in denominator
temp

res <- merge(res, temp, by = "year")
head(res)

res$term <- NA
res$term[res$year %in% c(1973:1974)] <- 1
res$term[res$year %in% c(1975:1977)] <- 2
res$term[res$year %in% c(1978:1980)] <- 3
res$term[res$year %in% c(1985:1988)] <- 4
res$term[res$year %in% c(1989:1992)] <- 5
res$term[res$year %in% c(1993:1996)] <- 6
res$term[res$year %in% c(1997:2000)] <- 7
res$term[res$year %in% c(2001:2004)] <- 8
res$term[res$year %in% c(2005:2008)] <- 9
res$term[res$year %in% c(2009:2012)] <- 10
res$term[res$year %in% c(2013:2016)] <- 11
res$term[res$year %in% c(2017:2020)] <- 12
res$term[res$year > 2021] <- 13
table(res$term, exclude = NULL)
head(res)

# code party of WH
res$party <- NA
res$party[res$year %in% c(1973:1976, 1981:1992, 2001:2008, 2017:2020)] <- "rep"
res$party[res$year %in% c(1977:1980, 1993:2000, 2009:2016)] <- "dem"
table(res$party, exclude = NULL)

top_tier$party <- NA
top_tier$party[top_tier$year %in% c(1973:1976, 1981:1992, 2001:2008, 2017:2020)] <- "rep"
top_tier$party[top_tier$year %in% c(1977:1980, 1993:2000, 2009:2016)] <- "dem"
table(top_tier$party, exclude = NULL)


### Compute result for footnote 10
m1 <- lm(fem2 ~ party, data = top_tier)
summary(m1)
temp1 <- coeftest(m1, vcov = vcovCL, cluster = ~year)
temp1

# use only cases where agencies retain the same positions over time
always<-top_tier[top_tier$always == T & !(top_tier$role_status%in%"null_role"), ]
dim(always)
length(unique(always$position))#152 top-tier positions consistently in the panel
dim(always[!is.na(always$fem2),])#about 5400 positions

### Compute analogous result for footnote 10 using positions always present
m1 <- lm(fem2 ~ party, data = always)
summary(m1)
temp1 <- coeftest(m1, vcov = vcovCL, cluster = ~year)
temp1

temp <- summaryBy(fem2 ~ year, data = top_tier[top_tier$always == T, ], FUN = mean, na.rm = T)
colnames(temp) <- c("year", "fem2.mean_common_positions")
head(temp)


## add rowcounts to observations with given years
years <- unique(top_tier$year)
top_tier$size.year <- NA
for (i in 1:length(years)) {
  top_tier$size.year[top_tier$year == years[i]] <- nrow(top_tier[top_tier$year == years[i], ])
}

year.res <- summaryBy(size.year ~ year, data = top_tier)
total <- sum(year.res$size.year.mean)


# counts of women in top-tier posts each year
res.sum <- summaryBy(fem2 ~ year, data = top_tier, FUN = sum, na.rm = T)
res.sum


## same thing by agency AND year
resb <- summaryBy(fem2 ~ year + agency, data = top_tier, FUN = mean, na.rm = T) ## vacancies not in denominator
resb

resb$size <- NA

years <- unique(resb$year)
ags <- unique(resb$agency)


### rank and file positions
years <- unique(rank_and_file$year)
res.all <- as.data.frame(matrix(ncol = 2, nrow = length(years)))
colnames(res.all) <- c("year", "pct_female")
res.all$year <- years

for (i in 1:length(years)) {
  ## compute correction to subtract top tier positions from total
  num1 <- sum(top_tier$fem2[top_tier$year == years[i]], na.rm = T)
  denom1 <- sum(!is.na(top_tier$fem2[top_tier$year == years[i]]))

  num2 <- sum(rank_and_file$total_female[rank_and_file$year == years[i]], na.rm = T) - num1
  denom2 <- sum(rank_and_file$total_emp[rank_and_file$year == years[i]], na.rm = T) - denom1


  res.all$pct_female[res.all$year == years[i]] <- num2 / denom2
}


res.all ## rank and file means
res ## top tier means

# Figure 2
pdf(file = "Accepted Paper Replication Archive/figures/figure_2.pdf")

plot(res$year, res$fem2.mean,
  type = "l", xlab = "Year", ylab = "% Women",
  axes = F, main = "",
  ylim = c(0, .55), lwd = 2, xlim = c(1973, 2021)
)
abline(v = c(1977, 1981, 1993, 2001, 2009, 2017, 2021), lty = 1, col = "grey")
axis(1, at = seq(1970, 2022, by = 4), cex.axis = .7)
axis(2, at = seq(0, .6, by = .1), las = 2, cex = .7, labels = seq(0, .6, by = .1) * 100)
## plot female share of US workforce
lines(workforce_gender$year, workforce_gender$share_women2, lty = 3, lwd = 2, col = "red")

## plot admin means
segments(x0 = 1973, x1 = 1977, y0 = mean(top_tier$fem2[top_tier$year >= 1973 & top_tier$year < 1977], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1973 & top_tier$year < 1977], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1977, x1 = 1981, y0 = mean(top_tier$fem2[top_tier$year >= 1977 & top_tier$year < 1981], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1977 & top_tier$year < 1981], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1981, x1 = 1993, y0 = mean(top_tier$fem2[top_tier$year >= 1981 & top_tier$year < 1993], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1981 & top_tier$year < 1993], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1993, x1 = 2001, y0 = mean(top_tier$fem2[top_tier$year >= 1993 & top_tier$year < 2001], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1993 & top_tier$year < 2001], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2001, x1 = 2009, y0 = mean(top_tier$fem2[top_tier$year >= 2001 & top_tier$year < 2009], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2001 & top_tier$year < 2009], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2009, x1 = 2017, y0 = mean(top_tier$fem2[top_tier$year >= 2009 & top_tier$year < 2017], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2009 & top_tier$year < 2017], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2017, x1 = 2021, y0 = mean(top_tier$fem2[top_tier$year >= 2017 & top_tier$year < 2021], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2017 & top_tier$year < 2021], na.rm = T), col = t_col("red", percent = 70), lwd = 2)


lines(res.all$year, res.all$pct_female, lty = 2, lwd = 2)
lines(res$year, res$fem2.mean, lwd = 2)

text(x = 1974, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 1979, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 1987, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 1997, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 2005, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 2013, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 2018.8, y = .5, labels = "Rep", col = "red", cex = .7)

legend(
  x = 1972, y = .25, legend = rev(c("Top-Tier Posts", "Rank and File", "U.S. Workforce")),
  lty = c(3, 2, 1), lwd = c(2, 2, 2), col = c("red", "black", "black"), cex = .8, bg = "white"
)

dev.off()




## Figure A1 -------------------------------------------------
# re-do using only common positions


# Figure A1
pdf(file = "Accepted Paper Replication Archive/figures/figure_a1.pdf")

plot(res$year, res$fem2.mean_common_positions,
  type = "l", xlab = "Year", ylab = "% Women",
  axes = F, main = "",
  ylim = c(0, .55), lwd = 2, xlim = c(1973, 2021)
)
abline(v = c(1977, 1981, 1993, 2001, 2009, 2017, 2021), lty = 1, col = "grey")
axis(1, at = seq(1970, 2022, by = 4), cex.axis = .7)
axis(2, at = seq(0, .6, by = .1), las = 2, cex = .7, labels = seq(0, .6, by = .1) * 100)
## plot female share of US workforce
lines(workforce_gender$year, workforce_gender$share_women2, lty = 3, lwd = 2, col = "red")

## plot admin means
segments(x0 = 1973, x1 = 1977, y0 = mean(top_tier$fem2[top_tier$year >= 1973 & top_tier$year < 1977 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1973 & top_tier$year < 1977 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1977, x1 = 1981, y0 = mean(top_tier$fem2[top_tier$year >= 1977 & top_tier$year < 1981 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1977 & top_tier$year < 1981 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1981, x1 = 1993, y0 = mean(top_tier$fem2[top_tier$year >= 1981 & top_tier$year < 1993 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1981 & top_tier$year < 1993 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 1993, x1 = 2001, y0 = mean(top_tier$fem2[top_tier$year >= 1993 & top_tier$year < 2001 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 1993 & top_tier$year < 2001 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2001, x1 = 2009, y0 = mean(top_tier$fem2[top_tier$year >= 2001 & top_tier$year < 2009 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2001 & top_tier$year < 2009 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2009, x1 = 2017, y0 = mean(top_tier$fem2[top_tier$year >= 2009 & top_tier$year < 2017 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2009 & top_tier$year < 2017 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)
segments(x0 = 2017, x1 = 2021, y0 = mean(top_tier$fem2[top_tier$year >= 2017 & top_tier$year < 2021 & top_tier$always == T], na.rm = T), y1 = mean(top_tier$fem2[top_tier$year >= 2017 & top_tier$year < 2021 & top_tier$always == T], na.rm = T), col = t_col("red", percent = 70), lwd = 2)


lines(res.all$year, res.all$pct_female, lty = 2, lwd = 2)
lines(temp$year, temp$fem2.mean_common_positions, lwd = 2)

text(x = 1974, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 1979, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 1987, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 1997, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 2005, y = .5, labels = "Rep", col = "red", cex = .7)
text(x = 2013, y = .5, labels = "Dem", col = "blue", cex = .7)
text(x = 2018.8, y = .5, labels = "Rep", col = "red", cex = .7)

legend(
  x = 1972, y = .30, legend = rev(c("Top-Tier Posts", "Rank and File", "U.S. Workforce")),
  lty = c(3, 2, 1), lwd = c(2, 2, 2), col = c("red", "black", "black"), cex = .8, bg = "white"
)

dev.off()

# -------------------------------------------------------------------------


